001 /* 002 * CondorDispatcher.java 003 * 004 * Created on July 17, 2003, 11:17 AM 005 * 006 * This file is part of the STAR Scheduler. 007 * Copyright (c) 2002-2003 STAR Collaboration - Brookhaven National Laboratory 008 * 009 * STAR Scheduler is free software; you can redistribute it and/or modify 010 * it under the terms of the GNU General Public License as published by 011 * the Free Software Foundation; either version 2 of the License, or 012 * (at your option) any later version. 013 * 014 * STAR Scheduler is distributed in the hope that it will be useful, 015 * but WITHOUT ANY WARRANTY; without even the implied warranty of 016 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 017 * GNU General Public License for more details. 018 * 019 * You should have received a copy of the GNU General Public License 020 * along with STAR Scheduler; if not, write to the Free Software 021 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 022 */ 023 package gov.bnl.star.offline.scheduler.condorg; 024 025 import gov.bnl.star.offline.scheduler.*; 026 import gov.bnl.star.offline.scheduler.ComponentLibrary; 027 import gov.bnl.star.offline.scheduler.Dispatchers.lsf.CSHApplication; 028 import gov.bnl.star.offline.scheduler.Dispatchers.lsf.LSFDispatcher; 029 import gov.bnl.star.offline.scheduler.util.CSHCommandLineTask; 030 import gov.bnl.star.offline.scheduler.util.FilesystemToolkit; 031 //import gov.bnl.star.offline.scheduler.util.StatisticsRecorder; //Moved Statistics recording to Scheduler.java LH 032 033 import java.io.File; 034 import java.io.FileOutputStream; 035 import java.io.PrintStream; 036 import java.util.*; 037 038 import java.util.logging.Level; 039 import java.util.logging.Logger; 040 041 042 /** Dispatches jobs using Condor-G on a remote site that uses LSF. It will use some 043 * extra rsl attributes created to command some extra features such as mail 044 * notification, resource usage, job name and target machine. These extra LSF 045 * attribute require a patch to the LSF job manager. 046 * @author Gabriele Carcassi 047 * @version 1.0 2003/07/23 048 */ 049 public class CondorDispatcher extends LSFDispatcher { 050 static private Logger log = Logger.getLogger(CondorGLSFDispatcher.class.getName()); 051 private String condorEx; 052 053 private String condorOptions; 054 055 public void setCondorEx(String condorEx) { 056 this.condorEx = condorEx; 057 } 058 059 public String getCondorEx() { 060 return condorEx; 061 } 062 063 /** Creates a new dispatcher */ 064 public CondorDispatcher() { 065 } 066 067 /** Creates the scripts and dispatches the job on the target machine. 068 * @param request the job request 069 */ 070 public void dispatch(Request request, List jobs) { 071 log.info("Dispatching using Condor: \"" + request.getCommand() + 072 "\""); 073 074 // Enables the simulation mode if necessary 075 useSimulationMode(request.getSimulation()); 076 reportedFailure = false; 077 078 // Submits from the higher to the lower JobID. This way the 079 // user has a feel of when the last job is going to be 080 // submitted 081 for (int nProcess = jobs.size() - 1; nProcess >= 0; 082 nProcess--) { 083 Job job = (Job) jobs.get(nProcess); 084 085 System.out.print("Dispatching process " + 086 job.getJobID() + "."); 087 dispatch(request, job); 088 } 089 090 //StatisticsRecorder.getIntance().recordStatistics(request, jobs); 091 } 092 093 protected void dispatch(Request request, Job job) { 094 application = (CSHApplication) ComponentLibrary.getInstance().getComponent("CSHApplication"); 095 096 // TODO: all the parameters should be passed in one go 097 application.setJob(request, job); 098 application.setScratchDir(scratchDir); 099 application.setSubmissionCommand(getCondorCommand(request, job)); 100 101 application.prepareJob(); 102 prepareClassAd(request, job); 103 104 log.info("Executing \"" + getCondorCommand(request, job) + "\""); 105 106 if (!simulation) { 107 try { 108 Thread.sleep(getMsBtwnSuccess()); 109 } catch (Exception e) { 110 } 111 112 int attempt = 0; 113 boolean success = false; 114 115 while (!success && (attempt < getMaxAttempts())) { 116 try { 117 CSHCommandLineTask task = new CSHCommandLineTask(getCondorCommand( 118 request, job), true, 30000); 119 task.execute(); 120 121 if (task.getExitStatus() != 0) { 122 log.warning("bsub failed: " + task.getOutput()); 123 Thread.sleep(getMsBtwnFailure()); 124 System.out.print("/"); 125 attempt++; 126 } else { 127 success = true; 128 } 129 } catch (Exception e) { 130 log.log(Level.SEVERE, 131 "Couldn't submit the script to Condor-g", e); 132 133 try { 134 Thread.sleep(getMsBtwnFailure()); 135 } catch (Exception e1) { 136 } 137 138 System.out.print("/"); 139 attempt++; 140 } 141 } 142 143 if (success) { 144 System.out.println(" done."); 145 } else { 146 System.out.println(" FAILED!!"); 147 } 148 } else { 149 System.out.println(" simulated."); 150 } 151 } 152 153 /** Returns the command line to submit the job through condor-g. 154 * @param request the request that originated the job 155 * @param job the job to be dispatched 156 * @return the commandline to submit the job 157 */ 158 protected String getCondorCommand(Request request, Job job) { 159 return condorEx + " " + getClassAdName(request, job); 160 } 161 162 /** Returns the name of the file containing the class ad. Class ad is the job 163 * description required by condor to submit a job. 164 * @param request the request that originated the job 165 * @param job the job to be submitted 166 * @return the file name of the class ad 167 */ 168 protected String getClassAdName(Request request, Job job) { 169 return "sched" + job.getJobID() + ".condor"; 170 } 171 private void prepareClassAd(Request request, Job job) { 172 try { 173 PrintStream classAd = new PrintStream(new FileOutputStream( 174 new File(getClassAdName(request, job)))); 175 createClassAd(request, job, classAd); 176 } catch (Exception e) { 177 log.log(Level.SEVERE, "Couldn't create the class ad", e); 178 throw new RuntimeException("Couldn't create the class ad " + 179 getClassAdName(request, job) + ": " + e.getMessage()); 180 } 181 } 182 183 private void createClassAd(Request request, Job job, 184 PrintStream classAd) { 185 classAd.println("Universe = vanilla"); 186 classAd.println(); 187 //classAd.println("+Experiment = \"star\""); 188 classAd.println("Notification = never"); 189 classAd.print("Executable = "); 190 classAd.println(getExecutable()); 191 192 if (getArguments() != null) { 193 classAd.print("Arguments = "); 194 classAd.println(getArguments()); 195 } 196 197 if (application.getStdin() != null) { 198 classAd.print("Input = "); 199 classAd.println(application.getStdin()); 200 } 201 202 if (application.getStdout() != null) { 203 classAd.print("Output = "); 204 classAd.println(application.getStdout()); 205 } 206 207 if (application.getStderr() != null) { 208 classAd.print("Error = "); 209 classAd.println(application.getStderr()); 210 } 211 212 classAd.print("Log = "); 213 classAd.println(getLogName(job)); 214 classAd.println("Getenv = true"); 215 216 if (getRemoteDirectory() != null) { 217 classAd.print("Initialdir = "); 218 classAd.println(getRemoteDirectory()); 219 } 220 221 if (getCondorOptions() != null) { 222 classAd.println(getCondorOptions()); 223 } 224 225 // classAd.println("transfer_executable = false"); 226 classAd.println("Queue"); 227 } 228 229 private String getExecutable() { 230 if (application.getCommandLine().indexOf(' ') == -1) { 231 return application.getCommandLine(); 232 } 233 234 return application.getCommandLine().substring(0, 235 application.getCommandLine().indexOf(' ')); 236 } 237 238 private String getArguments() { 239 if (application.getCommandLine().indexOf(' ') == -1) { 240 return null; 241 } 242 243 return application.getCommandLine().substring(application.getCommandLine() 244 .indexOf(' ') + 245 1); 246 } 247 248 private String getLogName(Job job) { 249 // TODO maybe log filename should be put as a general property of Process (as stds) 250 return "sched" + job.getJobID() + ".condor.log"; 251 } 252 253 /* private String getGlobusScheduler() { 254 //TODO make it flexible 255 return "stargrid01.rcf.bnl.gov/jobmanager-lsf"; 256 }*/ 257 258 private String getRemoteDirectory() { 259 // TODO this has to be specified better: remote execution directory could be different from scheduler execution directory 260 return FilesystemToolkit.getCurrentDirectory(); 261 } 262 263 /** Getter for property condorOptions. 264 * @return Value of property condorOptions. 265 * 266 */ 267 public String getCondorOptions() { 268 return this.condorOptions; 269 } 270 271 /** Setter for property condorOptions. 272 * @param condorOptions New value of property condorOptions. 273 * 274 */ 275 public void setCondorOptions(String condorOptions) { 276 this.condorOptions = condorOptions; 277 } 278 279 /* protected String getResourceUsageSwitch(Process job) { 280 String res = super.getResourceUsageSwitch(job); 281 282 return res.replaceAll("\"", "\\\\\""); 283 }*/ 284 }